Paso 3

Análisis de clases latentes: modelo seleccionado sin predictores, caracterización de clases y medidas de ajuste

Andrés González Santa Cruz
April 28, 2023
Show code
script src = "https://ajax.googleapis.com/ajax/libs/jquery/3.4.1/jquery.min.js"
Show code
 $(document).ready(function() {
    $('body').prepend('<div class=\"zoomDiv\"><img src=\"\" class=\"zoomImg\"></div>');
    // onClick function for all plots (img's)
    $('img:not(.zoomImg)').click(function() {
      $('.zoomImg').attr('src', $(this).attr('src')).css({width: '100%'});
      $('.zoomDiv').css({opacity: '1', width: 'auto', border: '1px solid white', borderRadius: '5px', position: 'fixed', top: '50%', left: '50%', marginRight: '-50%', transform: 'translate(-50%, -50%)', boxShadow: '0px 0px 50px #888888', zIndex: '50', overflow: 'auto', maxHeight: '100%'});
    });
    // onClick function for zoomImg
    $('img.zoomImg').click(function() {
      $('.zoomDiv').css({opacity: '0', width: '0%'}); 
    });
  });
  
Show code
<script src="hideOutput.js"></script> 
Show code
$(document).ready(function() {    
    $chunks = $('.fold');    
    $chunks.each(function () {      // add button to source code chunks     
    if ( $(this).hasClass('s') ) {       
        $('pre.r', this).prepend("<div class=\"showopt\">Show Source</div><br style=\"line-height:22px;\"/>");
            $('pre.r', this).children('code').attr('class', 'folded');     
            }      // add button to output chunks     
        if ( $(this).hasClass('o') ) {       
            $('pre:not(.r)', this).has('code').prepend("<div class=\"showopt\">Show Output</div><br style=\"line-height:22px;\"/>");       
            $('pre:not(.r)', this).children('code:not(r)').addClass('folded');        // add button to plots       
            $(this).find('img').wrap('<pre class=\"plot\"></pre>');       
            $('pre.plot', this).prepend("<div class=\"showopt\">Show Plot</div><br style=\"line-height:22px;\"/>");       
            $('pre.plot', this).children('img').addClass('folded');      
            }   
});    // hide all chunks when document is loaded   
    $('.folded').css('display', 'none')    // function to toggle the visibility   
    $('.showopt').click(function() {     
            var label = $(this).html();     
            if (label.indexOf("Show") >= 0) {       
                $(this).html(label.replace("Show", "Hide"));     
            } else {
              $(this).html(label.replace("Hide", "Show"));     
            }     
    $(this).siblings('code, img').slideToggle('fast', 'swing');   
    }); 
}); 

Cargamos los datos

Show code
rm(list = ls());gc()
         used (Mb) gc trigger (Mb) max used (Mb)
Ncells 535896 28.7    1209275 64.6   643711 34.4
Vcells 908248  7.0    8388608 64.0  1649632 12.6
Show code
load("data2_lca2_adj_2023_04_27.RData")

Cargamos los paquetes

Show code
knitr::opts_chunk$set(echo = TRUE)

if(!require(poLCA)){install.packages("poLCA")}
if(!require(poLCAParallel)){devtools::install_github("QMUL/poLCAParallel@package")}
if(!require(compareGroups)){install.packages("compareGroups")}
if(!require(parallel)){install.packages("parallel")}
if(!require(Hmisc)){install.packages("Hmisc")}
if(!require(tidyverse)){install.packages("tidyverse")}
try(if(!require(sjPlot)){install.packages("sjPlot")})
if(!require(emmeans)){install.packages("emmeans")}
if(!require(nnet)){install.packages("nnet")}
if(!require(here)){install.packages("here")}
if(!require(doParallel)){install.packages("doParallel")}
if(!require(progress)){install.packages("progress")}
if(!require(caret)){install.packages("caret")}
if(!require(rpart)){install.packages("rpart")}
if(!require(rpart.plot)){install.packages("rpart.plot")}
if(!require(partykit)){install.packages("partykit")}
if(!require(randomForest)){install.packages("randomForest")}
if(!require(ggcorrplot)){install.packages("ggcorrplot")}
if(!require(polycor)){install.packages("polycor")}
if(!require(tableone)){install.packages("tableone")}
if(!require(broom)){install.packages("broom")}
if(!require(plotly)){install.packages("plotly")}
if(!require(rsvg)){install.packages("rsvg")}
if(!require(DiagrammeRsvg)){install.packages("DiagrammeRsvg")}
if(!require(effects)){install.packages("effects")}

Seleccionar modelos finales

Sin resultado distal

Show code
set.seed(2125)
mydata_preds3$nrow<-rbinom(n=nrow(mydata_preds3),size=1,prob=0.3)

rio::export(subset(mydata_preds3, nrow==1), "base_entrenamiento.dta")
Show code
#Si probs.start se establece en NULL (predeterminado) al llamar Polca, a continuación, la función genera los valores de partida al azar en cada ejecución. Esto significa que repite carreras de Polca normalmente producirán resultados con los mismos parámetros estimados (correspondiente a la misma el máximo diario de probabilidad), pero con etiquetas de clase latentes reordenados

#https://drive.google.com/file/d/10njMh5JEcqaBgCnoZdJ1uk3uCEkocDez/view?usp=share_link
#http://daob.nl/wp-content/uploads/2015/07/ESRA-course-slides.pdf
#https://docs.google.com/document/d/1LVeDpAP6CfT3n8B6HhHcc_SRnzO0JBoT/edit

#bvr(ppio_newList$lc_entropy_best_model)

#A list of matrices of class-conditional response probabilities to be used as the starting values for the estimation algorithm. Each matrix in the list corresponds to one manifest variable, with one row for each latent class, and one column for each outcome. The default is NULL, producing random starting values. Note that if nrep>1, then any user-specified probs.start values are only used in the first of the nrep attempts.

#The poLCA.reorder function takes as its first argument the list of starting values probs.start, and as its second argument a vector describing the desired reordering of the latent classes.
new.probs.start <-  poLCA.reorder(LCA_best_model_ppio$probs.start, order(LCA_best_model_ppio$P, decreasing = TRUE))
#new.probs.start <-poLCA.reorder(probs.start,c(4,1,3,2))
#A continuación, ejecute PoLCA, una vez más, esta vez utilizando los valores iniciales reordenados en la llamada de función.

#The argument nrep=5 tells the program to repeat nrep times, and keep the fit with the highest likelihood to try to avoid local maxima.

#.maxiter – The maximum number of iterations through which the estimation algorithm will cycle.
#.nrep - Number of times to estimate the model, using different values of probs.start. (default is one)
set.seed(2125)
LCA_best_model_mod<-
   poLCA(f_preds, mydata_preds3, nclass=length(LCA_best_model_ppio$P), 
         maxiter=10000,tol=1e-5, na.rm=FALSE,nrep=1e3, verbose=TRUE, calc.se=TRUE,probs.start=new.probs.start) 
Conditional item response (column) probabilities,
 by outcome variable, for each class (row) 
 
$CAUSAL
          Pr(1)  Pr(2)  Pr(3)  Pr(4)
class 1:      0 0.3453 0.6547 0.0000
class 2:      0 0.0086 0.0000 0.9914
class 3:      0 0.9688 0.0312 0.0000
class 4:      0 0.1274 0.8726 0.0000
class 5:      0 0.0556 0.0000 0.9444
class 6:      0 0.0755 0.9245 0.0000
class 7:      0 0.7077 0.2923 0.0000

$EDAD_MUJER_REC
           Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)  Pr(6)
class 1:  0.0000 0.0170 0.2899 0.3167 0.3139 0.0624
class 2:  0.0017 0.3681 0.2947 0.2092 0.1071 0.0192
class 3:  0.0006 0.0106 0.2416 0.4167 0.2945 0.0359
class 4:  0.0017 0.0414 0.2875 0.3261 0.2783 0.0651
class 5:  0.0000 0.1794 0.3665 0.2472 0.1770 0.0299
class 6:  0.0030 0.0017 0.0869 0.2979 0.5372 0.0734
class 7:  0.0684 0.0360 0.2950 0.3044 0.2476 0.0486

$PUEBLO_ORIGINARIO_REC
           Pr(1)  Pr(2)  Pr(3)
class 1:  0.1255 0.8373 0.0372
class 2:  0.1521 0.8030 0.0449
class 3:  0.0859 0.8644 0.0496
class 4:  0.1239 0.8184 0.0577
class 5:  0.1290 0.8337 0.0373
class 6:  0.1401 0.8599 0.0000
class 7:  1.0000 0.0000 0.0000

$PAIS_ORIGEN_REC
           Pr(1)  Pr(2)  Pr(3)
class 1:  0.0061 0.0000 0.9939
class 2:  0.0000 0.9922 0.0078
class 3:  0.0000 0.8801 0.1199
class 4:  0.0000 0.9888 0.0112
class 5:  0.0000 0.0000 1.0000
class 6:  0.0000 0.9267 0.0733
class 7:  0.0838 0.8285 0.0876

$HITO1_EDAD_GEST_SEM_REC
           Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)  Pr(6)
class 1:  0.0077 0.0071 0.3264 0.3671 0.2250 0.0666
class 2:  0.0088 0.7795 0.2117 0.0000 0.0000 0.0000
class 3:  0.0754 0.2027 0.2229 0.4909 0.0000 0.0081
class 4:  0.0063 0.0000 0.3261 0.3623 0.2232 0.0821
class 5:  0.0202 0.7950 0.1848 0.0000 0.0000 0.0000
class 6:  0.0080 0.0040 0.6073 0.2573 0.1039 0.0195
class 7:  0.0500 0.0000 0.0914 0.2432 0.3096 0.3058

$MACROZONA
           Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)  Pr(6)
class 1:  0.0000 0.6107 0.0844 0.0373 0.2359 0.0317
class 2:  0.0052 0.3843 0.1750 0.1448 0.0859 0.2048
class 3:  0.0000 0.3996 0.1653 0.1662 0.1128 0.1562
class 4:  0.0009 0.3191 0.1827 0.2115 0.0888 0.1970
class 5:  0.0060 0.5570 0.0803 0.0159 0.2994 0.0413
class 6:  0.0000 0.6958 0.1042 0.0628 0.0662 0.0709
class 7:  0.0311 0.1991 0.3145 0.1668 0.0996 0.1888

$PREV_TRAMO_REC
           Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)
class 1:  0.0048 0.0000 0.6095 0.2880 0.0977
class 2:  0.0017 0.0701 0.7223 0.2006 0.0053
class 3:  0.0000 0.0915 0.5565 0.3520 0.0000
class 4:  0.0008 0.0034 0.6437 0.3521 0.0000
class 5:  0.0300 0.0043 0.5214 0.1636 0.2808
class 6:  0.0000 0.7657 0.0353 0.1908 0.0082
class 7:  0.0276 0.0370 0.6457 0.2347 0.0550

Estimated class population shares 
 0.0948 0.1529 0.1902 0.3412 0.0437 0.1273 0.0498 
 
Predicted class memberships (by modal posterior prob.) 
 0.1082 0.1512 0.2114 0.3339 0.0433 0.1045 0.0475 
 
========================================================= 
Fit for 7 latent classes: 
========================================================= 
number of observations: 3789 
number of estimated parameters: 188 
residual degrees of freedom: 3601 
maximum log-likelihood: -26835.6 
 
AIC(7): 54047.2
BIC(7): 55220.29
G^2(7): 3742.787 (Likelihood ratio/deviance statistic) 
X^2(7): 38877.83 (Chi-square goodness of fit) 
 
Show code
output_LCA_best_model_mod<-capture.output(LCA_best_model_mod)
glance_LCA_best_model_mod<-glance(LCA_best_model_mod)
mydata_preds_LCA1 <- augment(LCA_best_model_mod, data = mydata_preds3)
Show code
# fig.height=15, 
## If you are interested in the population-shares of the classes, you can get them like this:
warning(paste("Probabilidades posteriores: ",
  paste(round(colMeans(LCA_best_model_mod$posterior)*100,2), collapse=", ")
  ))
## or you inspect the estimated class memberships:
warning(paste("Probabildiades predichas: ",
  paste(round(prop.table(table(LCA_best_model_mod$predclass)),4)*100, collapse=", ")
  ))

traductor_cats <- readxl::read_excel("tabla12.xlsx") %>% 
  dplyr::mutate(level=readr::parse_double(level)) %>% 
  dplyr::mutate(charactersitic=gsub(" \\(%\\)", "", charactersitic))



lcmodel <- reshape2::melt(LCA_best_model_mod$probs, level=2)
lcmodel<- lcmodel %>% 
  dplyr::mutate(pr=as.numeric(gsub("[^0-9.]+", "", Var2))) %>% 
  dplyr::left_join(traductor_cats[,c("charactersitic", "level", "CATEGORIA")], by= c("L2"="charactersitic", "pr"="level"))

lcmodel$text_label<-paste0("Categoria:",lcmodel$CATEGORIA,"<br>%: ",scales::percent(lcmodel$value))


zp1 <- ggplot(lcmodel,aes(x = L2, y = value, fill = Var2, label=text_label))
zp1 <- zp1 + geom_bar(stat = "identity", position = "stack")
zp1 <- zp1 + facet_grid(Var1 ~ .) 
zp1 <- zp1 + scale_fill_brewer(type="seq", palette="Greys", na.value = "white") +theme_bw()
zp1 <- zp1 + labs(y = "Porcentaje de probabilidad de respuesta", 
                  x = "",
                  fill ="Cateorías de\nRespuesta")
zp1 <- zp1 + theme( axis.text.y=element_blank(),
                    axis.ticks.y=element_blank(),                    
                    panel.grid.major.y=element_blank())
zp1 <- zp1 + guides(fill = guide_legend(reverse=TRUE))
zp1 <- zp1 + theme(axis.text.x = element_text(angle = 30, hjust = 1))
#print(zp1)

ggplotly(zp1, tooltip = c("text_label"))%>% layout(xaxis= list(showticklabels = T),height=600, width=800)

Figure 1: Selected Model

Show code
ggsave("_fig3_LCA_distribuciones.png",zp1, dpi= 600)
lcmodel %>% rio::export("variables_probabilities_in_category.xlsx")
Error in saveWorkbook(wb, file = file, overwrite = overwrite): File already exists!
Show code
#In this case, residuals are actual cell counts vs. expected cell counts. 
bvr_LCA_best_model_mod<-bvr(LCA_best_model_mod)

#conditional probabilities
#Pr(B1=1|Class 3)
posteriors <- data.frame(LCA_best_model_mod$posterior, predclass=LCA_best_model_mod$predclass) 

classification_table <- plyr::ddply(posteriors, "predclass", function(x) colSums(x[,1:length(LCA_best_model_ppio$P)]))
clasification_errors<- 1 - sum(diag(as.matrix(classification_table[,2:(length(LCA_best_model_ppio$P)+1)]))) / sum(classification_table[,2:(length(LCA_best_model_ppio$P)+1)]) 

warning(paste("Error de clasificación: ", round(clasification_errors,2)))
Warning: Error de clasificación: 0.11
Show code
entropy_alt <- function(p) sum(-p * log(p))
error_prior <- entropy_alt(LCA_best_model_mod$P) # Class proportions
error_post <- mean(apply(LCA_best_model_mod$posterior, 1, entropy_alt),na.rm=T)
R2_entropy_alt <- (error_prior - error_post) / error_prior
warning(paste("Entropía: ", round(R2_entropy_alt,2)))
Warning: Entropía: 0.86
Show code
#https://stackoverflow.com/questions/72783185/entropy-calculation-gives-nan-is-applying-na-omit-a-valid-tweak
entropy.safe <- function (p) {
  if (any(p > 1 | p < 0)) stop("probability must be between 0 and 1")
  log.p <- numeric(length(p))
  safe <- p != 0
  log.p[safe] <- log(p[safe])
  sum(-p * log.p)
}

error_prior2 <- entropy.safe(LCA_best_model_mod$P) # Class proportions
error_post2 <- mean(apply(LCA_best_model_mod$posterior, 1, entropy.safe),na.rm=T)
R2_entropy_safe <- (error_prior2 - error_post2) / error_prior2
warning(paste("Entropía segura: ", round(R2_entropy_safe,2)))
Warning: Entropía segura: 0.83
Show code
entropy.brutal <- function (p) {
  if (any(p > 1 | p < 0)) stop("probability must be between 0 and 1")
  log.p <- log(p)
  ## as same as sum(na.omit(-p * log.p))
  sum(-p * log.p, na.rm = TRUE)
}

error_prior3 <- entropy.brutal(LCA_best_model_mod$P) # Class proportions
error_post3 <- mean(apply(LCA_best_model_mod$posterior, 1, entropy.brutal),na.rm=T)
R2_entropy_brutal <- (error_prior3 - error_post3) / error_prior3
warning(paste("Entropía brutal: ", round(R2_entropy_brutal,2)))
Warning: Entropía brutal: 0.83
Show code
#https://gist.github.com/daob/c2b6d83815ddd57cde3cebfdc2c267b3
warning(paste("Entropía (solución Oberski): ", round(entropy.R2(LCA_best_model_mod),2)))
Warning: Entropía (solución Oberski): 0.83
Show code
#\#minimum average posterior robability of cluster membership (\>0.7), interpretability (classes are clearly distinguishable), and parsimony (each class has a sufficient sample size for further analysis; n≥50).


Ver si la exclusión de casos que no calzan en alguna clase tiene consecuencias.


Show code
#To evaluate whether the exclusion of cases would bias the LCA results, a sensitivity analysis was carried out. We conducted T-Tests and Wilcoxon–Mann–Whitney tests (for non-parametric data) to compare included and excluded records in terms of demographic and clinical background characteristics and baseline pain scores (all 638 patients completed pain intensity, frequency and impairment scales).
tidy(LCA_best_model_mod) %>% 
  # dplyr::mutate(variable= dplyr::case_when(variable=="naq1"~"naq01",
  #                              variable=="naq2"~"naq02",
  #                              variable=="naq3"~"naq03",
  #                              variable=="naq4"~"naq04",
  #                              variable=="naq5"~"naq05",
  #                              variable=="naq6"~"naq06",
  #                              variable=="naq7"~"naq07",
  #                              variable=="naq8"~"naq08",
  #                              variable=="naq9"~"naq09",
  #                              TRUE~variable)) %>% 
ggplot(aes(outcome, estimate, color = factor(class), group = class)) +
  geom_line() +
  facet_wrap(~variable, nrow = 4) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  theme_bw()+
  theme(legend.position = "bottom")
Modelo seleccionado

Figure 2: Modelo seleccionado

Show code
tidy(LCA_best_model_mod) %>% rio::export("variables_class_estimate.xlsx")
Error in saveWorkbook(wb, file = file, overwrite = overwrite): File already exists!

Con resultado distal

Show code
new.probs.start_adj <-  poLCA.reorder(LCA_best_model_adj_ppio$probs.start, 
                                      order(LCA_best_model_adj_ppio$P, decreasing = TRUE))
set.seed(2125)
LCA_best_model_adj_mod<-
   poLCA(f_adj, mydata_preds3, nclass=length(LCA_best_model_adj_ppio$P), 
         maxiter=10000,tol=1e-5, na.rm=FALSE,nrep=1e3, verbose=TRUE, calc.se=TRUE,probs.start=new.probs.start_adj) 
Conditional item response (column) probabilities,
 by outcome variable, for each class (row) 
 
$CAUSAL
          Pr(1)  Pr(2)  Pr(3)  Pr(4)
class 1:      0 0.1320 0.8680 0.0000
class 2:      0 0.0094 0.0000 0.9906
class 3:      0 0.9731 0.0269 0.0000
class 4:      0 0.0747 0.9253 0.0000
class 5:      0 0.3813 0.6187 0.0000
class 6:      0 0.7470 0.2530 0.0000

$EDAD_MUJER_REC
           Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)  Pr(6)
class 1:  0.0017 0.0463 0.3003 0.3206 0.2643 0.0667
class 2:  0.0014 0.3290 0.3111 0.2186 0.1182 0.0218
class 3:  0.0007 0.0117 0.2421 0.4157 0.2944 0.0355
class 4:  0.0035 0.0000 0.1023 0.3158 0.5047 0.0738
class 5:  0.0000 0.0175 0.3057 0.3071 0.3136 0.0560
class 6:  0.0683 0.0342 0.2965 0.3124 0.2506 0.0381

$PUEBLO_ORIGINARIO_REC
           Pr(1)  Pr(2)  Pr(3)
class 1:  0.1289 0.8069 0.0642
class 2:  0.1478 0.8087 0.0435
class 3:  0.0846 0.8659 0.0494
class 4:  0.1385 0.8615 0.0000
class 5:  0.1311 0.8324 0.0365
class 6:  1.0000 0.0000 0.0000

$PAIS_ORIGEN_REC
           Pr(1)  Pr(2)  Pr(3)
class 1:  0.0000 0.9678 0.0322
class 2:  0.0000 0.7795 0.2205
class 3:  0.0000 0.8768 0.1232
class 4:  0.0000 0.9226 0.0774
class 5:  0.0084 0.0000 0.9916
class 6:  0.0853 0.8245 0.0902

$HITO1_EDAD_GEST_SEM_REC
           Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)  Pr(6)
class 1:  0.0014 0.0000 0.2863 0.3711 0.2446 0.0965
class 2:  0.0109 0.7838 0.2052 0.0000 0.0000 0.0000
class 3:  0.0746 0.2052 0.2292 0.4856 0.0000 0.0054
class 4:  0.0160 0.0046 0.6077 0.2574 0.0994 0.0150
class 5:  0.0090 0.0148 0.3182 0.3823 0.2174 0.0583
class 6:  0.0578 0.0000 0.0861 0.2475 0.2964 0.3122

$MACROZONA
           Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)  Pr(6)
class 1:  0.0009 0.2834 0.1820 0.2320 0.0904 0.2113
class 2:  0.0054 0.4219 0.1551 0.1171 0.1318 0.1686
class 3:  0.0000 0.3981 0.1631 0.1681 0.1120 0.1587
class 4:  0.0000 0.6803 0.1170 0.0600 0.0713 0.0713
class 5:  0.0000 0.6318 0.0836 0.0139 0.2522 0.0185
class 6:  0.0329 0.1872 0.3354 0.1628 0.0986 0.1831

$PREV_TRAMO_REC
           Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)
class 1:  0.0012 0.0115 0.6462 0.3410 0.0000
class 2:  0.0070 0.0561 0.6791 0.1935 0.0643
class 3:  0.0012 0.0872 0.5608 0.3508 0.0000
class 4:  0.0000 0.5705 0.1720 0.2507 0.0069
class 5:  0.0067 0.0000 0.6030 0.2764 0.1139
class 6:  0.0243 0.0405 0.6497 0.2304 0.0551

Estimated class population shares 
 0.3135 0.1948 0.1902 0.1679 0.0864 0.0472 
 
Predicted class memberships (by modal posterior prob.) 
 0.3091 0.1937 0.2096 0.1396 0.1019 0.0462 
 
========================================================= 
Fit for 6 latent classes: 
========================================================= 
2 / 1 
            Coefficient  Std. error  t value  Pr(>|t|)
(Intercept)    -1.73952     0.15665  -11.105         0
outcome1        1.50423     0.16853    8.926         0
========================================================= 
3 / 1 
            Coefficient  Std. error  t value  Pr(>|t|)
(Intercept)    -0.93385     0.14463   -6.457         0
outcome1        0.55944     0.14811    3.777         0
========================================================= 
4 / 1 
            Coefficient  Std. error  t value  Pr(>|t|)
(Intercept)    -2.30154     0.29701   -7.749         0
outcome1        1.94604     0.27783    7.004         0
========================================================= 
5 / 1 
            Coefficient  Std. error  t value  Pr(>|t|)
(Intercept)    -1.99732     0.21671   -9.217         0
outcome1        0.88526     0.22140    3.998         0
========================================================= 
6 / 1 
            Coefficient  Std. error  t value  Pr(>|t|)
(Intercept)    -3.00748     0.43176   -6.966     0.000
outcome1        1.34115     0.43506    3.083     0.002
========================================================= 
number of observations: 3789 
number of estimated parameters: 166 
residual degrees of freedom: 3623 
maximum log-likelihood: -26897.5 
 
AIC(6): 54127
BIC(6): 55162.82
X^2(6): 39536.84 (Chi-square goodness of fit) 
 
ALERT: estimation algorithm automatically restarted with new initial values 
 
Show code
output_LCA_best_model_adj_mod<-capture.output(LCA_best_model_adj_mod)
glance_LCA_best_model_adj_mod<-glance(LCA_best_model_adj_mod)
mydata_preds_LCA2 <- augment(LCA_best_model_adj_mod, data = mydata_preds3)
Show code
# fig.height=15, 
## If you are interested in the population-shares of the classes, you can get them like this:
warning(paste("Probabilidades posteriores: ",
  paste(round(colMeans(LCA_best_model_adj_mod$posterior)*100,2), collapse=", ")
  ))
## or you inspect the estimated class memberships:
warning(paste("Probabildiades predichas: ",
  paste(round(prop.table(table(LCA_best_model_adj_mod$predclass)),4)*100, collapse=", ")
  ))

lcmodel_adj <- reshape2::melt(LCA_best_model_adj_mod$probs, level=2)
lcmodel_adj<- lcmodel_adj %>% 
  dplyr::mutate(pr=as.numeric(gsub("[^0-9.]+", "", Var2))) %>% 
  dplyr::left_join(traductor_cats[,c("charactersitic", "level", "CATEGORIA")], by= c("L2"="charactersitic", "pr"="level"))

lcmodel_adj$text_label<-paste0("Categoria:",lcmodel_adj$CATEGORIA,"<br>%: ",scales::percent(lcmodel_adj$value))


zp2 <- ggplot(lcmodel_adj,aes(x = L2, y = value, fill = Var2, label=text_label))
zp2 <- zp2 + geom_bar(stat = "identity", position = "stack")
zp2 <- zp2 + facet_grid(Var1 ~ .) 
zp2 <- zp2 + scale_fill_brewer(type="seq", palette="Greys", na.value = "white") +theme_bw()
zp2 <- zp2 + labs(y = "Porcentaje de probabilidad de respuesta", 
                  x = "",
                  fill ="Cateorías de\nRespuesta")
zp2 <- zp2 + theme( axis.text.y=element_blank(),
                    axis.ticks.y=element_blank(),                    
                    panel.grid.major.y=element_blank())
zp2 <- zp2 + guides(fill = guide_legend(reverse=TRUE))
zp2 <- zp2 + theme(axis.text.x = element_text(angle = 30, hjust = 1))
#print(zp1)

ggplotly(zp2, tooltip = c("text_label"))%>% layout(xaxis= list(showticklabels = T),height=600, width=800)

Figure 3: Selected Model

Show code
ggsave("_fig3_LCA_distribuciones_adj.png",zp2, dpi= 600)

lcmodel_adj %>% rio::export("variables_probabilities_in_category_adj.xlsx")
Error in saveWorkbook(wb, file = file, overwrite = overwrite): File already exists!
Show code
#In this case, residuals are actual cell counts vs. expected cell counts. 
bvr_LCA_best_model_adj_mod<-bvr(LCA_best_model_adj_mod)

#conditional probabilities
#Pr(B1=1|Class 3)
posteriors_adj <- data.frame(LCA_best_model_adj_mod$posterior, 
                             predclass=LCA_best_model_adj_mod$predclass) 

classification_table_adj <- plyr::ddply(posteriors, "predclass", function(x) colSums(x[,1:length(LCA_best_model_adj_mod$P)]))
clasification_errors_adj<- 1 - sum(diag(as.matrix(classification_table_adj[,2:(length(LCA_best_model_adj_mod$P)+1)]))) / sum(classification_table_adj[,2:(length(LCA_best_model_adj_mod$P)+1)]) 

warning(paste("Error de clasificación: ", round(clasification_errors_adj,2)))
Warning: Error de clasificación: 0.1
Show code
entropy_alt <- function(p) sum(-p * log(p))
error_prior_adj <- entropy_alt(LCA_best_model_adj_mod$P) # Class proportions
error_post_adj <- mean(apply(LCA_best_model_adj_mod$posterior, 1, entropy_alt),na.rm=T)
R2_entropy_alt_adj <- (error_prior_adj - error_post_adj) / error_prior_adj
warning(paste("Entropía: ", round(R2_entropy_alt_adj,2)))
Warning: Entropía: 0.83
Show code
#https://stackoverflow.com/questions/72783185/entropy-calculation-gives-nan-is-applying-na-omit-a-valid-tweak
entropy.safe <- function (p) {
  if (any(p > 1 | p < 0)) stop("probability must be between 0 and 1")
  log.p <- numeric(length(p))
  safe <- p != 0
  log.p[safe] <- log(p[safe])
  sum(-p * log.p)
}

error_prior2_adj <- entropy.safe(LCA_best_model_adj_mod$P) # Class proportions
error_post2_adj <- mean(apply(LCA_best_model_adj_mod$posterior, 1, entropy.safe),na.rm=T)
R2_entropy_safe_adj <- (error_prior2_adj - error_post2_adj) / error_prior2_adj
warning(paste("Entropía segura: ", round(R2_entropy_safe,2)))
Warning: Entropía segura: 0.83
Show code
#https://gist.github.com/daob/c2b6d83815ddd57cde3cebfdc2c267b3
warning(paste("Entropía (solución Oberski): ", round(entropy.R2(LCA_best_model_adj_mod),2)))
Warning: Entropía (solución Oberski): 0.79
Show code
#\#minimum average posterior robability of cluster membership (\>0.7), interpretability (classes are clearly distinguishable), and parsimony (each class has a sufficient sample size for further analysis; n≥50).


Ver si la exclusión de casos que no calzan en alguna clase tiene consecuencias.


Show code
#To evaluate whether the exclusion of cases would bias the LCA results, a sensitivity analysis was carried out. We conducted T-Tests and Wilcoxon–Mann–Whitney tests (for non-parametric data) to compare included and excluded records in terms of demographic and clinical background characteristics and baseline pain scores (all 638 patients completed pain intensity, frequency and impairment scales).
tidy(LCA_best_model_adj_mod) %>% 
  # dplyr::mutate(variable= dplyr::case_when(variable=="naq1"~"naq01",
  #                              variable=="naq2"~"naq02",
  #                              variable=="naq3"~"naq03",
  #                              variable=="naq4"~"naq04",
  #                              variable=="naq5"~"naq05",
  #                              variable=="naq6"~"naq06",
  #                              variable=="naq7"~"naq07",
  #                              variable=="naq8"~"naq08",
  #                              variable=="naq9"~"naq09",
  #                              TRUE~variable)) %>% 
ggplot(aes(outcome, estimate, color = factor(class), group = class)) +
  geom_line() +
  facet_wrap(~variable, nrow = 4) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))+
  theme_bw()+
  theme(legend.position = "bottom")
Modelo seleccionado

Figure 4: Modelo seleccionado

Show code
tidy(LCA_best_model_adj_mod) %>% rio::export("variables_class_estimate_adj.xlsx")
Error in saveWorkbook(wb, file = file, overwrite = overwrite): File already exists!
Show code
# fig.cap= "Probabilidad predicha de presentar interrupción del embarazo por clase",
cbind(
    effects::allEffects(LCA_best_model_adj_mod, se=T, confint=T)$outcome$prob, effects::allEffects(LCA_best_model_adj_mod, se=T, confint=T)$outcome$lower.prob,
    effects::allEffects(LCA_best_model_adj_mod, se=T, confint=T)$outcome$upper.prob) %>% 
    data.frame() %>% 
    dplyr::mutate_all(~round(.,2)) %>% 
    dplyr::mutate(prob_c1= paste0(prob.X1, "(95%CI=",L.prob.X1,",",U.prob.X1,")"),prob_c2= paste0(prob.X2, "(95%CI=",L.prob.X2,",",U.prob.X2,")"), prob_c3= paste0(prob.X3, "(95%CI=",L.prob.X3,",",U.prob.X3,")"),prob_c4= paste0(prob.X4, "(95%CI=",L.prob.X4,",",U.prob.X4,")"),prob_c5= paste0(prob.X5, "(95%CI=",L.prob.X5,",",U.prob.X5,")"),prob_c6= paste0(prob.X6, "(95%CI=",L.prob.X6,",",U.prob.X6,")")) %>% dplyr::select(starts_with("prob_c")) %>% 
  t() %>% data.table::data.table(keep.rownames=T) %>% 
  dplyr::mutate(rn=gsub("prob_c","",rn)) %>% 
  knitr::kable("markdown", caption="Probabilidad de pertenecer a una clase, según interrupción del embarazo", col.names = c("Clase","No interrumpe", "Interrumpe"))
Table 1: Probabilidad de pertenecer a una clase, según interrupción del embarazo
Clase No interrumpe Interrumpe
1 0.18(95%CI=0.15,0.22) 0.18(95%CI=0.17,0.19)
2 0.21(95%CI=0.18,0.25) 0.27(95%CI=0.25,0.28)
3 0.24(95%CI=0.2,0.27) 0.28(95%CI=0.26,0.29)
4 0.15(95%CI=0.13,0.19) 0.14(95%CI=0.13,0.16)
5 0.12(95%CI=0.1,0.15) 0.09(95%CI=0.08,0.1)
6 0.09(95%CI=0.07,0.11) 0.05(95%CI=0.04,0.06)
Show code
#########################################################
#########################################################
### Posterior probability calculation                 ###
### Assign class based on maximum probability         ###
###   Note: additional prep for Table1 package        ###
###         1) Convert all categorical variables to   ###
###            factors                                ###
###         2) Continuous variables as numeric        ###
###         3) Pull out number from census strings    ###
#########################################################
#########################################################
Show code
save.image("data2_lca3.RData")
Show code
require(tidyverse)
sesion_info <- devtools::session_info()
dplyr::select(
  tibble::as_tibble(sesion_info$packages),
  c(package, loadedversion, source)
) %>% 
  DT::datatable(filter = 'top', colnames = c('Row number' =1,'Variable' = 2, 'Percentage'= 3),
              caption = htmltools::tags$caption(
        style = 'caption-side: top; text-align: left;',
        '', htmltools::em('Packages')),
      options=list(
initComplete = htmlwidgets::JS(
        "function(settings, json) {",
        "$(this.api().tables().body()).css({
            'font-family': 'Helvetica Neue',
            'font-size': '50%', 
            'code-inline-font-size': '15%', 
            'white-space': 'nowrap',
            'line-height': '0.75em',
            'min-height': '0.5em'
            });",#;
        "}")))